library(ggmap)
library(ggplot2)
library(ggmap)
library(maps)
library(mapdata)
library(openintro)
library(plotly)
library(billboarder)
library(vctrs)
library(dplyr)
states <- read.csv("~/Downloads/states.csv")
Breweries <- read.csv("~/Downloads/Breweries.csv")
Beers <- read.csv("~/Downloads/Beers.csv")

Question 1 - How many Brewereries are in Each State?

dat = Breweries

#obtain the count of the number of breweres by state arranged in decending order
state_count = dat %>% group_by(State) %>% count() %>% arrange(-n)

Bar Chart of Brewery Count by State

billboarder() %>%
  bb_barchart(state_count) %>%
  bb_labs(title = "Count of Breweries by State")

Heatmap

#Here we are creating heatmap to show quanties. 
heat_map = ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, fill = n, group = group), color = "white") + 
  coord_fixed(1.3) +
  scale_fill_gradientn(colours = rev(heat.colors(10)),na.value = "grey90",
                       guide = guide_colourbar(barwidth = 5, barheight = 0.4,
                                               #put legend title on top of legend
                                               title.position = "top")) + 
  labs(title = "Count of Breweries by State (Continental US)")
  
ggplotly(heat_map)

Question 2 - Merge data by Brew ID, and check first 6 and last 6 obs.

#rename col to match Brewery Data set
colnames(Beers)[5]="Brew_ID"

#merge data by Brew_ID 
final=merge(Beers,Breweries,by=c("Brew_ID"))

head(final,n=6)
  Brew_ID        Name.x Beer_ID   ABV IBU                               Style
1       1  Get Together    2692 0.045  50                        American IPA
2       1 Maggie's Leap    2691 0.049  26                  Milk / Sweet Stout
3       1    Wall's End    2690 0.048  19                   English Brown Ale
4       1       Pumpion    2689 0.060  38                         Pumpkin Ale
5       1    Stronghold    2688 0.060  25                     American Porter
6       1   Parapet ESB    2687 0.056  47 Extra Special / Strong Bitter (ESB)
  Ounces             Name.y        City State
1     16 NorthGate Brewing  Minneapolis    MN
2     16 NorthGate Brewing  Minneapolis    MN
3     16 NorthGate Brewing  Minneapolis    MN
4     16 NorthGate Brewing  Minneapolis    MN
5     16 NorthGate Brewing  Minneapolis    MN
6     16 NorthGate Brewing  Minneapolis    MN
tail(final,n=6)
     Brew_ID                    Name.x Beer_ID   ABV IBU
2405     556             Pilsner Ukiah      98 0.055  NA
2406     557  Heinnieweisse Weissebier      52 0.049  NA
2407     557           Snapperhead IPA      51 0.068  NA
2408     557         Moo Thunder Stout      50 0.049  NA
2409     557         Porkslap Pale Ale      49 0.043  NA
2410     558 Urban Wilderness Pale Ale      30 0.049  NA
                       Style Ounces                        Name.y          City
2405         German Pilsener     12         Ukiah Brewing Company         Ukiah
2406              Hefeweizen     12       Butternuts Beer and Ale Garrattsville
2407            American IPA     12       Butternuts Beer and Ale Garrattsville
2408      Milk / Sweet Stout     12       Butternuts Beer and Ale Garrattsville
2409 American Pale Ale (APA)     12       Butternuts Beer and Ale Garrattsville
2410        English Pale Ale     12 Sleeping Lady Brewing Company     Anchorage
     State
2405    CA
2406    NY
2407    NY
2408    NY
2409    NY
2410    AK

Question 3 - How to deal with Missing Values

#Of the missing values, 1005 are missing for "IBU", 5 missing values for "Style"", and 62 missing values for ABV, and 1,005 missing values for IBU.  


#One method would be to omit the observations with missing data which isn't ideal since we will lose quite a few observations

omita=na.omit(final, cols = "ABV")
omiti=na.omit(final, cols = "IBU")

#The other method would be to impute the median values of the variable to the missing variables.

#Find Medians of Style, IBU, and ABV

Question 4 - What is the median alcohol content and international bitterness unit for each state?

#find median of ABV and IBU
 omita%>%group_by(State)%>%summarise(median(ABV),median(IBU))
# A tibble: 50 x 3
   State `median(ABV)` `median(IBU)`
   <fct>         <dbl>         <dbl>
 1 " AK"        0.057           46  
 2 " AL"        0.06            43  
 3 " AR"        0.04            39  
 4 " AZ"        0.0575          20.5
 5 " CA"        0.058           42  
 6 " CO"        0.065           40  
 7 " CT"        0.061           29  
 8 " DC"        0.059           47.5
 9 " DE"        0.055           52  
10 " FL"        0.062           55  
# … with 40 more rows

Question 5 - Which state has the most bitter beer and highest ABV

When we exclude all of the missing values, we find that Maine has both the highest median of ABV and IBU amongst all states. THe highest ABV is London Balling from KY. The most bitter beer (highest IBU) is “Bitter Bitch Imperial IPA” from OR.

medianalc=as.data.frame(omita%>%group_by(State)%>%summarise(median(ABV)))
medianibu=as.data.frame(omiti%>%group_by(State)%>%summarise(median(IBU)))

                        
#label columns
colnames(medianalc)=c("State","ABV")
colnames(medianibu)=c("State","IBU")

#sort in descending order
medianalc = medianalc %>% arrange(-ABV)
medianibu = medianibu %>% arrange(-IBU)

#create charts

billboarder() %>%
  bb_barchart(medianalc) %>%
  bb_labs(title = "Median ABV of Breweries by State")
billboarder() %>%
  bb_barchart(medianibu) %>%
  bb_labs(title = "Median IBU of Breweries by State")
#Find beer with MAX ABV and MAX IBU
omita[omita$ABV==max(omita$ABV),] 
  Brew_ID         Name.x Beer_ID   ABV IBU              Style Ounces
8       2 London Balling    2685 0.125  80 English Barleywine     16
                     Name.y       City State
8 Against the Grain Brewery Louisville    KY
omiti[omiti$IBU==max(omiti$IBU),] 
     Brew_ID                    Name.x Beer_ID   ABV IBU
1857     375 Bitter Bitch Imperial IPA     980 0.082 138
                              Style Ounces                  Name.y    City
1857 American Double / Imperial IPA     12 Astoria Brewing Company Astoria
     State
1857    OR

Question 6 - Comment on Distribution of ABV and summary statistics

We see that the distribution to be right skewed (the mean of 0.05977 is greater than the median of 0.05600) as seen in the histogram. We wee a range of 0.001 - 0.128 in the data for ABV. There are 62 missing values that we exclude from the analysis.

summary(omita$ABV)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.02700 0.05000 0.05700 0.05991 0.06800 0.12500 
final %>% ggplot(aes(ABV*100)) + geom_histogram() + xlab("Alcohol by Volume") + ggtitle("Distribution of Beer ABV") 

Question 7 - Is there a relationship between IBU and ABV?

Looking at the plot, there does appear to be an association between IBU and ABV. As the ABV increases, we see IBU tends to increase as well. Theres seems to be a direct positive relationship between the two variables. There are some extreme outliers with extremely high ABV seen in the far right of the plot.

ggplot(data=omita) + geom_point(aes(x=ABV,y=IBU)) + ggtitle ("IBU vs ABV (Excluding all Missing Values)")